	mov	QWORD PTR [rsp+16], rbx
	mov	QWORD PTR [rsp+24], rbp
	mov	QWORD PTR [rsp+32], rsi
	push	rdi
	push	r12
	push	r13
	push	r14
	push	r15
	sub	rsp, 64

	stmxcsr DWORD PTR [rsp]
	mov DWORD PTR [rsp+4], 24448
	ldmxcsr DWORD PTR [rsp+4]

	mov	rax, QWORD PTR [rcx+48]
	mov	r9, rcx
	xor	rax, QWORD PTR [rcx+16]
	mov	ebp, 524288
	mov	r8, QWORD PTR [rcx+32]
	xor	r8, QWORD PTR [rcx]
	mov	r11, QWORD PTR [rcx+40]
	mov	r10, r8
	mov	rdx, QWORD PTR [rcx+56]
	movq	xmm3, rax
	xor	rdx, QWORD PTR [rcx+24]
	xor	r11, QWORD PTR [rcx+8]
	mov	rbx, QWORD PTR [rcx+224]
	mov	rax, QWORD PTR [r9+80]
	xor	rax, QWORD PTR [r9+64]
	movq	xmm0, rdx
	mov	rcx, QWORD PTR [rcx+88]
	xor	rcx, QWORD PTR [r9+72]
	mov	rdi, QWORD PTR [r9+104]
	and	r10d, 2097136
	movaps	XMMWORD PTR [rsp+48], xmm6
	movq	xmm4, rax
	movaps	XMMWORD PTR [rsp+32], xmm7
	movaps	XMMWORD PTR [rsp+16], xmm8
	xorps	xmm8, xmm8
	mov ax, 1023
	shl rax, 52
	movq xmm7, rax
	mov	r15, QWORD PTR [r9+96]
	punpcklqdq xmm3, xmm0
	movq	xmm0, rcx
	punpcklqdq xmm4, xmm0

	ALIGN(64)
main_loop_ryzen:
	movdqa	xmm5, XMMWORD PTR [r10+rbx]
	movq	xmm0, r11
	movq	xmm6, r8
	punpcklqdq xmm6, xmm0
	lea	rdx, QWORD PTR [r10+rbx]
	lea	r9, QWORD PTR [rdi+rdi]
	shl	rdi, 32

	mov	ecx, r10d
	mov	eax, r10d
	xor	ecx, 16
	xor	eax, 32
	xor	r10d, 48
	aesenc	xmm5, xmm6
	movdqa	xmm2, XMMWORD PTR [rcx+rbx]
	movdqa	xmm1, XMMWORD PTR [rax+rbx]
	movdqa	xmm0, XMMWORD PTR [r10+rbx]
	paddq	xmm2, xmm3
	paddq	xmm1, xmm6
	paddq	xmm0, xmm4
	movdqa	XMMWORD PTR [rcx+rbx], xmm0
	movdqa	XMMWORD PTR [rax+rbx], xmm2
	movdqa	XMMWORD PTR [r10+rbx], xmm1

	movaps	xmm1, xmm8
	mov	rsi, r15
	xor	rsi, rdi
	movq	r14, xmm5
	movdqa	xmm0, xmm5
	pxor	xmm0, xmm3
	mov	r10, r14
	and	r10d, 2097136
	movdqa	XMMWORD PTR [rdx], xmm0
	xor	rsi, QWORD PTR [r10+rbx]
	lea	r12, QWORD PTR [r10+rbx]
	mov	r13, QWORD PTR [r10+rbx+8]

	add	r9d, r14d
	or	r9d, -2147483647
	xor	edx, edx
	movdqa	xmm0, xmm5
	psrldq	xmm0, 8
	movq	rax, xmm0

	div	r9
	movq xmm0, rax
	movq xmm1, rdx
	punpckldq xmm0, xmm1
	movq r15, xmm0
	paddq xmm0, xmm5
	movdqa xmm2, xmm0
	psrlq xmm0, 12
	paddq	xmm0, xmm7
	sqrtsd	xmm1, xmm0
	movq	rdi, xmm1
	test	rdi, 524287
	je	sqrt_fixup_ryzen
	shr	rdi, 19

sqrt_fixup_ryzen_ret:
	mov	rax, rsi
	mul	r14
	movq xmm1, rax
	movq xmm0, rdx
	punpcklqdq xmm0, xmm1

	mov	r9d, r10d
	mov	ecx, r10d
	xor	r9d, 16
	xor	ecx, 32
	xor	r10d, 48
	movdqa	xmm1, XMMWORD PTR [rcx+rbx]
	xor rdx, [rcx+rbx]
	xor rax, [rcx+rbx+8]
	movdqa	xmm2, XMMWORD PTR [r9+rbx]
	pxor xmm2, xmm0
	paddq xmm4, XMMWORD PTR [r10+rbx]
	paddq	xmm2, xmm3
	paddq	xmm1, xmm6
	movdqa	XMMWORD PTR [r9+rbx], xmm4
	movdqa	XMMWORD PTR [rcx+rbx], xmm2
	movdqa	XMMWORD PTR [r10+rbx], xmm1

	movdqa	xmm4, xmm3
	add	r8, rdx
	add	r11, rax
	mov	QWORD PTR [r12], r8
	xor	r8, rsi
	mov	QWORD PTR [r12+8], r11
	mov	r10, r8
	xor	r11, r13
	and	r10d, 2097136
	movdqa	xmm3, xmm5
	dec	ebp
	jne	main_loop_ryzen

	ldmxcsr DWORD PTR [rsp]
	movaps	xmm6, XMMWORD PTR [rsp+48]
	lea	r11, QWORD PTR [rsp+64]
	mov	rbx, QWORD PTR [r11+56]
	mov	rbp, QWORD PTR [r11+64]
	mov	rsi, QWORD PTR [r11+72]
	movaps	xmm8, XMMWORD PTR [r11-48]
	movaps	xmm7, XMMWORD PTR [rsp+32]
	mov	rsp, r11
	pop	r15
	pop	r14
	pop	r13
	pop	r12
	pop	rdi
	jmp cnv2_main_loop_ryzen_endp

sqrt_fixup_ryzen:
	movq r9, xmm2
	dec	rdi
	mov edx, -1022
	shl rdx, 32
	mov	rax, rdi
	shr	rdi, 19
	shr	rax, 20
	mov	rcx, rdi
	sub	rcx, rax
	lea	rcx, [rcx+rdx+1]
	add	rax, rdx
	imul	rcx, rax
	sub	rcx, r9
	adc	rdi, 0
	jmp	sqrt_fixup_ryzen_ret

cnv2_main_loop_ryzen_endp:
